
/* 

Paper: Gentrification and pioneer businesses 
Authors: Behrens, Boulam, Martin, Mayneris 

Name dofile: identify_pioneers_med.do  
Version: 11 nov. 2021 

Output: dataset pioneers_final_med.dta with the baseline list of pioneer sectors  

Inputs:

 - pioneer_gentri90_only`n'_negbin_`v'_1990.dta with n in {pc, sh_edu, gentri} and v in {250, 500, cont} [produced in compute_elasticity_negbin.do] [publicly available]

*/ 

foreach n in "250" "500" "cont"{
	foreach v in "pc" "sh_edu"{
	use pioneer_gentri90_only`n'_negbin_`v'_1990_med.dta,clear
	
	// we generate dummies for significance of coefficients
	g siginit`n'=abs(coef_`v'_gr_poor_init`n'/se_`v'_gr_poor_init`n')>2.576&coef_`v'_gr_poor_init`n'!=.
	g siginit_pcinc`n'=abs(coef_poor_init`n'/ se_poor_init`n')>2.576&coef_poor_init`n'!=.
	
	// we create dummies for pioneers 
	gen pioneer_`n'_`v'_high= (coef_poor_init`n'<0&siginit_pcinc`n'==1&coef_`v'_gr_poor_init`n'>0&siginit`n'==1)
	save pioneers`n'_negbin_`v'_med.dta, replace  
		}
	}
	
foreach n in "250" "500" "cont"{
	foreach v in "gentri"{
	use pioneer_gentri90_only`n'_negbin_`v'_1990.dta,clear // computing the weighted mean or the median is relevant 
	
	// we generate dummies for significance of coefficients
	g siginit`n'=abs(coef_`v'_init`n'/se_`v'_init`n')>2.576&coef_`v'_init`n'!=.
	g siginit_pcinc`n'=abs(coef_poor_init`n'/ se_poor_init`n')>2.576&coef_poor_init`n'!=.
	
	// we create dummies for pioneers 
	gen pioneer_`n'_`v'_high= (coef_poor_init`n'<0&siginit_pcinc`n'==1&coef_`v'_init`n'>0&siginit`n'==1)
	save pioneers`n'_negbin_`v'.dta, replace  
		}
	}

	// we define as pioneer sectors that appear as such for at least 5 out of the 9 estimations
	use pioneers250_negbin_gentri.dta, clear
	merge 1:1 naics using pioneers500_negbin_gentri.dta
	drop _m
	merge 1:1 naics using pioneerscont_negbin_gentri.dta
	drop _m
	foreach n in "250" "500" "cont"{
	foreach v in "pc" "sh_edu"{
	merge 1:1 naics using pioneers`n'_negbin_`v'_med.dta
	drop _m
		}
	}
	
	gen var_pioneer_high=pioneer_250_pc_high+pioneer_500_pc_high+pioneer_cont_pc_high+pioneer_250_sh_edu_high+pioneer_500_sh_edu_high+pioneer_cont_sh_edu_high+pioneer_250_gentri_high+pioneer_500_gentri_high+pioneer_cont_gentri_high
	gen pioneer_high_negbin_med= (var_pioneer_high>5&var_pioneer_high!=.)
	keep naics* pioneer_high_negbin_med
	save pioneers_final_negbin_med.dta, replace
	
	
	